In [6]:
%matplotlib inline
%config InlineBackend.figure_format='retina'
from IPython.display import Image, HTML

import random
import math
import pysolar
import datetime as dt
import pytz
import matplotlib.dates as mpd
import scipy
import scipy.signal as signal
import sklearn
import numexpr as ne

from datacharm import *
from datacharm.dataplots import tableau20, ch_color

from enerpi.base import timeit
from enerpi.api import enerpi_data_catalog
from enerpi.enerplot import plot_tile_last_24h, plot_power_consumption_hourly
from enerpi.sunposition import sun_position


LAT = 38.631463
LONG = -0.866402
ELEV_M = 500
TZ = pytz.timezone('Europe/Madrid')
FS = (16, 10)
sns.set_style('ticks')
pd.set_option('display.width', 120)


def _tslim(ax, h0=12, hf=22, m0=0, mf=0):
    xlim = [mpd.num2date(x, tz=TZ) for x in ax.get_xlim()]
    new = [mpd.date2num(d.replace(hour=h, minute=m)) for d, h, m in zip(xlim, [h0, hf], [m0, mf])]
    ax.set_xlim(new)
    return ax



# Catálogo y lectura de todos los datos.
cat = enerpi_data_catalog()
data, data_s = cat.get_all_data()
print_info(data_s.tail())

LDR = pd.DataFrame(data.ldr).tz_localize(TZ)
print_cyan(LDR.describe().T.astype(int))
LDR.hist(bins=(LDR.max() - LDR.min() + 1).values[0] // 5, log=True, figsize=(18, 6), lw=0, color=tableau20[2])
plt.plot()

POWER = pd.DataFrame(data.power).tz_localize(TZ)
print_cyan(POWER.describe().T.astype(int))
POWER.hist(bins=int((POWER.max() - POWER.min() + 1).values[0] // 5), log=True, figsize=(18, 6), lw=0, color=tableau20[4])


***TIMEIT get_all_data TOOK: 2.065 s
                          kWh     t_ref  n_jump  n_exec  p_max  p_mean  p_min
ts                                                                           
2016-09-24 09:00:00  0.263024  0.999838       0       0  382.0   263.0  217.0
2016-09-24 10:00:00  0.272466  1.000088       0       0  857.0   272.0  236.0
2016-09-24 11:00:00  0.263264  0.999959       0       0  319.0   263.0  231.0
2016-09-24 12:00:00  0.288171  0.999889       0       0  909.0   288.0  235.0
2016-09-24 13:00:00  0.238090  0.828952       0       0  374.0   287.0  240.0
       count  mean  std  min  25%  50%  75%  max
ldr  3585678   249  231   11   32  161  474  748
         count  mean  std  min  25%  50%  75%   max
power  3585678   343  315  112  225  265  358  5304
Out[6]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1179db240>]], dtype=object)

In [1126]:
sns.kdeplot(POWER.power.values, shade=True, gridsize=6000)


/Users/uge/anaconda/envs/py35/lib/python3.5/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j
Out[1126]:
<matplotlib.axes._subplots.AxesSubplot at 0x190d8b4e0>

In [5]:
sns.palplot(tableau20)



In [34]:
homog_power = POWER.resample('1s').mean().fillna(method='bfill', limit=3).fillna(-1) #.round().astype('int16')
homog_power.info()


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3726199 entries, 2016-08-12 10:46:25+02:00 to 2016-09-24 13:49:43+02:00
Freq: S
Data columns (total 1 columns):
power    float32
dtypes: float32(1)
memory usage: 42.6 MB

In [1268]:
POWER.power.round().value_counts()


Out[1268]:
225.0     40082
226.0     39897
227.0     39706
224.0     39609
223.0     39010
228.0     38825
222.0     38525
221.0     37295
229.0     37273
220.0     36522
230.0     35738
219.0     35256
218.0     34276
231.0     33899
217.0     32920
232.0     32222
216.0     31744
215.0     30570
233.0     30537
207.0     30457
208.0     30284
209.0     30049
214.0     30032
206.0     29996
210.0     29921
211.0     29740
213.0     29729
212.0     29493
234.0     29071
205.0     28588
          ...  
4065.0        1
4066.0        1
4069.0        1
4070.0        1
4072.0        1
4076.0        1
4083.0        1
4022.0        1
4019.0        1
4018.0        1
3972.0        1
3862.0        1
3865.0        1
3885.0        1
3893.0        1
3902.0        1
3928.0        1
3954.0        1
3963.0        1
3983.0        1
4016.0        1
3988.0        1
3995.0        1
3997.0        1
3999.0        1
4004.0        1
4005.0        1
4007.0        1
4012.0        1
112.0         1
Name: power, dtype: int64

In [41]:
power_day13 = homog_power.loc['2016-08-13']
ax = power_day13.plot(lw=1, alpha=.8)

power_day14 = homog_power.loc['2016-08-14']
power_day14.plot(ax=ax, lw=1, alpha=.8)


Out[41]:
<matplotlib.axes._subplots.AxesSubplot at 0x123fc60f0>

In [30]:
power_day13.resample('1s').mean().fillna(method='bfill', limit=3).fillna(-1).round().astype(int)


Out[30]:
power    5242
dtype: int64

In [42]:
power_standby = homog_power.loc['2016-08-29']
power_standby.plot(lw=1, alpha=.8)


Out[42]:
<matplotlib.axes._subplots.AxesSubplot at 0x126dfcd30>

In [103]:
mf5 = pd.Series(signal.medfilt(power_standby.power, kernel_size=5), index=power_standby.index, name='mf_5')
mf11 = pd.Series(signal.wiener(power_standby.power, 15), index=power_standby.index, name='wiener_11')

t0, tf = '3:10', '3:20'
ax = power_standby.between_time(t0, tf).plot(lw=1)
mf5.between_time(t0, tf).plot(ax=ax)
mf11.between_time(t0, tf).plot(ax=ax)
plt.show()


t0, tf = '3:20', '3:25'
ax = power_standby.between_time(t0, tf).plot(lw=1)
mf11.between_time(t0, tf).plot(ax=ax)
mf5.between_time(t0, tf).plot(ax=ax)


Out[103]:
<matplotlib.axes._subplots.AxesSubplot at 0x14f6a8e10>

In [100]:
power_standby.hist(bins=200, lw=0, log=True, alpha=.5)
mf11.hist(bins=200, lw=0, log=True, alpha=.5)


Out[100]:
<matplotlib.axes._subplots.AxesSubplot at 0x16f3594e0>

In [104]:
N = len(power_standby)
out_fft = np.fft.rfft((power_standby.power - power_standby.power.mean()).values)
out_fft_mf = np.fft.rfft((mf5 - mf5.mean()).values)
out_fft_mf2 = np.fft.rfft((mf11 - mf11.mean()).values)

plt.figure(figsize=(18, 15))
corte = 350 # seg

plt.subplot(3, 2, 1)
plt.plot(np.abs(out_fft[:corte + 1]), lw=1, alpha=.7)

plt.subplot(3, 2, 2)
plt.plot(np.abs(out_fft[corte:N//2]), lw=1, alpha=.7)

plt.subplot(3, 2, 3)
plt.plot(np.abs(out_fft_mf[:corte + 1]), lw=1, alpha=.7)

plt.subplot(3, 2, 4)
plt.plot(np.abs(out_fft_mf[corte:N//2]), lw=1, alpha=.7)

plt.subplot(3, 2, 5)
plt.plot(np.abs(out_fft_mf2[:corte + 1]), lw=1, alpha=.7)

plt.subplot(3, 2, 6)
plt.plot(np.abs(out_fft_mf2[corte:N//2]), lw=1, alpha=.7);



In [62]:
plt.plot(np.abs(out_fft_mf2[:60]), lw=1, alpha=.7);



In [ ]:

Extracting POWER events


In [312]:
@timeit('process_power', verbose=True)
def process_power(df_homog_power, 
                  kernel_size=15, roll_window_event=9, threshold_event=10,
                  margin_td=pd.Timedelta('120s'), 
                  margin_ant=pd.Timedelta('3s')):
    """
    Tomando una pd.DataFrame de 'POWER' homogéneo (con resample mean a 1s), 
        * aplica filtrado 'Wiener' a la señal raw,
        * calcula ∆_wiener, deltas de power entre samples
        * calcula ∆'_wiener con roll_window_event centrado, detectando máximos y mínimos locales 
        en los cambios de ∆_wiener, tomándolos como eventos de cambio.
        * Agrupa los eventos de cambio en subconjuntos separados, al menos, un pd.Timedelta 'margin_td'.
        (forma el evento desde inicio - pd.Timedelta 'margin_ant')
        
    return:
        df_power, list_index_events, df_feats_grupos
    """

    def _is_event(x):
        mid = len(x) // 2
        xmin = np.min(x)
        xmax = np.max(x)
        if ((xmax - xmin) > threshold_event):
            if (x[mid] == xmin):
                return -1
            elif (x[mid] == xmax):
                return 1
        return 0

    df_power = df_homog_power.copy()
    df_power['wiener'] = signal.wiener(df_power.power, kernel_size).astype('float32')
    df_power['medfilt'] = signal.medfilt(df_power.power, kernel_size).astype('float32')
    df_power['roll_std'] = df_power.wiener.rolling(11).std().astype('float32')
    
    df_power['deltas'] = df_power.wiener.diff().fillna(0)
    df_power['eventos'] = df_power['deltas'].diff().fillna(0).rolling(roll_window_event, center=True
                                                                     ).apply(_is_event).fillna(0).astype('int16')

    list_index_events = []
    last_event_init = None
    events_in_interval = []
    for t in df_power[df_power['eventos'] != 0].index:
        if last_event_init is None:
            events_in_interval.append(t)
        elif t > last_event_init + margin_td:
            list_index_events.append(events_in_interval)
            events_in_interval = [t]
        else:
            events_in_interval.append(t)
        last_event_init = t
    print_info('Nº de grupos de eventos: {} (para un nº de eventos total de: {})'
               .format(len(list_index_events), len(df_power[df_power['eventos'] != 0])))
    
    sub_events = []
    cols = ['power', 'medfilt', 'wiener', 'deltas', 'eventos']
    cols_feat_grupos = ['index_ge', 't0', 'duracion', 'wiener_min', 'wiener_mean', 'wiener_max', 'wiener_std', 'wiener_median', 
                        'eventos_sum', 'eventos_abs_sum', 'deltas_max', 'deltas_min', 'deltas_sum', 'deltas_abs_sum']    
    for i, ts in enumerate(list_index_events):
        t0, tf = ts[0] - margin_ant, ts[-1] + margin_td
        d_i = df_power.loc[t0:tf, cols]
        resumen_i = (i, d_i.index[0], len(d_i), 
                     d_i.wiener.min(), d_i.wiener.mean(), d_i.wiener.max(), d_i.wiener.std(), d_i.wiener.median(), 
                     d_i.eventos.sum(), d_i.eventos.abs().sum(), 
                     d_i.deltas.max(), d_i.deltas.min(), d_i.deltas.sum(), d_i.deltas.abs().sum())
        sub_events.append(resumen_i)
    df_feats_grupos = pd.DataFrame(sub_events, columns=cols_feat_grupos).set_index(cols_feat_grupos[0])
    return df_power, list_index_events, df_feats_grupos


df_power, list_index_events, df_feats_grupos = process_power(power_standby)
df_feats_grupos.describe()


Nº de grupos de eventos: 17 (para un nº de eventos total de: 76)
process_power TOOK: 0.617 s
Out[312]:
duracion wiener_min wiener_mean wiener_max wiener_std wiener_median eventos_sum eventos_abs_sum deltas_max deltas_min deltas_sum deltas_abs_sum
count 17.000000 17.000000 17.000000 17.000000 17.000000 17.000000 17.000000 17.000000 17.000000 17.000000 17.000000 17.000000
mean 135.411765 199.682035 223.403936 625.716531 48.622832 217.295883 -0.411765 3.823529 300.269726 -321.312862 11.041497 917.991433
std 13.647613 7.877142 12.715661 271.574096 30.633446 10.808972 0.618347 1.878673 206.774374 216.144354 10.972383 546.265832
min 125.000000 183.800476 195.313873 199.724258 1.908688 195.211060 -1.000000 2.000000 4.854843 -538.085815 -7.720901 60.890289
25% 126.000000 191.858337 214.732132 228.876953 3.838038 206.394714 -1.000000 2.000000 6.840714 -475.280762 6.910400 210.743942
50% 133.000000 201.769867 221.992096 782.892639 65.986412 222.654984 0.000000 3.000000 332.166199 -411.596222 14.278900 1240.151489
75% 139.000000 202.746536 235.968719 797.213623 67.981680 226.001053 0.000000 5.000000 468.395691 -7.878113 19.508942 1260.545532
max 184.000000 211.386139 236.496643 833.998474 72.090804 229.557693 1.000000 9.000000 534.843872 -1.787735 25.805740 1352.484741

In [510]:
# Análisis frecuencial de eventos / comparación de fft's

# Separación de eventos por duración. KMeans++ de k clusters
k = 9
km_dur_event = KMeans(n_clusters=k, n_jobs=-1).fit(df_feats_grupos_all_2.duracion.values.reshape(-1, 1))
print_info(sorted(km_dur_event.cluster_centers_))

amplitudes = []
grupo_durac = []
for i, ts in enumerate(lista_grupos_eventos_total_2):
    t0, tf = ts[0] - margin_ant, ts[-1] + margin_td
    d_i = POWER_FILT_2.loc[t0:tf, cols]
    if len(d_i) > 5:
        w = blackman(len(d_i))
        amplitudes_i = np.sqrt(np.abs(scipy.fftpack.rfft(d_i.power * w)))[:(len(d_i) // 2)]
        #amplitudes_i = np.sqrt(np.abs(np.fft.rfft(d_i.power)))[:(len(d_i) // 2)]
        
        amplitudes.append(amplitudes_i)
        grupo_durac.append(km_dur_event.labels_[i])
        

colors = tableau20[::2][:len(km_dur_event.cluster_centers_)]
plt.figure(figsize=(18, 12))
for a, g in zip(amplitudes, grupo_durac):
    color = colors[g]
    if len(a) > 200:
        params = dict(lw=.75, alpha=.3, color=color)
    else:
        params = dict(lw=.1, alpha=.01, color=color)
    plt.plot(a[2:], **params)
plt.xlim(0, 600);


[array([ 44.27913015]), array([ 94.02152318]), array([ 206.76315789]), array([ 478.75609756]), array([ 937.25]), array([ 2040.]), array([ 3634.2]), array([ 4933.]), array([ 6104.])]

In [511]:
f, axes = plt.subplots(1, 2, figsize=(20, 12))

ymax, ne = 0, 0
for a, g in zip(amplitudes, grupo_durac):
    color = colors[g]
    if len(a) > 300:
        params = dict(lw=.75, alpha=.35, color=color)
        axes[0].plot(a[2:], **params)
        ne += 1
        ymax = max(ymax, max(a[2:]))
axes[0].annotate('{} Eventos (de ∆T > {} s)'.format(ne, 300), (2000, ymax * .75), ha='left')

ymax, ne = 0, 0
for a, g in zip(amplitudes, grupo_durac):
    color = colors[g]
    if len(a) <= 300:
        if len(a) <= 100:
            params = dict(lw=.25, alpha=.03, color=color)
        else:
            params = dict(lw=.5, alpha=.1, color=color)
        axes[1].plot(a[2:], **params)
        ne += 1
        ymax = max(ymax, max(a[2:]))
axes[1].annotate('{} Eventos (de ∆T ≤ {} s)'.format(ne, 300), (200, ymax * .75), ha='left')
axes[1].set_xlim(0, 300);



In [487]:
big_events = df_feats_grupos_all_2[df_feats_grupos_all_2.duracion > 554]
print_info('Hay {} eventos de larga duración (∆T_medio={:.1} s)\n'.format(len(big_events), big_events.duracion.mean()))

plot_mosaico_eventos(POWER_FILT_2, big_events, n_rows=7, n_cols=5, size=4)


Hay 35 eventos de larga duración (∆T_medio=1888.7428571428572)

plot_mosaico_eventos TOOK: 12.457 s

In [490]:
amplitudes = []
for t0, dur in zip(big_events.t0, big_events.duracion):
    tf = t0 + pd.Timedelta(seconds=dur) + margin_td
    t0 -= margin_ant
    d_i = POWER_FILT_2.loc[t0:tf, cols]
    if len(d_i) > 5:
        amplitudes_i = np.sqrt(np.abs(np.fft.rfft(d_i.power)))[:(len(d_i) // 2)]
        amplitudes.append(amplitudes_i)
        
        
plt.figure(figsize=(18, 12))
for a in amplitudes:
    if len(a) > 1800:
        params = dict(lw=1, alpha=.5, color='b')
    else:
        params = dict(lw=1, alpha=.3, color='r')
    plt.plot(a[2:], **params)
#plt.xlim(0, 600);



In [509]:
from scipy.signal import blackman
from scipy.signal import hanning


x_fft = POWER_FILT_2.iloc[1000000:1001000].power
w = blackman(len(x_fft))
print_red(x_fft.head(10))

plt.plot(x_fft)
plt.show()
plt.plot(np.sqrt(np.abs(scipy.fftpack.fft(x_fft * w)))[1:len(x_fft) // 2])


ts
2016-08-24 00:33:05+02:00    240.281982
2016-08-24 00:33:06+02:00    252.030426
2016-08-24 00:33:07+02:00    244.846771
2016-08-24 00:33:08+02:00    227.035446
2016-08-24 00:33:09+02:00    228.060532
2016-08-24 00:33:10+02:00    241.670258
2016-08-24 00:33:11+02:00    261.747406
2016-08-24 00:33:12+02:00    250.801636
2016-08-24 00:33:13+02:00    248.085449
2016-08-24 00:33:14+02:00    255.234711
Freq: S, Name: power, dtype: float32
Out[509]:
[<matplotlib.lines.Line2D at 0x18df07c50>]

In [495]:
#window = np.hanning(51)
window = np.kaiser(12, 14)
plt.plot(window)
#plt.title("Hann window")
plt.title("Kaiser window")
plt.ylabel("Amplitude")
plt.xlabel("Sample")
plt.show()


plt.figure()

A = np.fft.fft(window, 2048) / 25.5
mag = np.abs(np.fft.fftshift(A))
freq = np.linspace(-0.5, 0.5, len(A))
response = 20 * np.log10(mag)
response = np.clip(response, -100, 100)
plt.plot(freq, response)

plt.title("Frequency response of the Hann window")
plt.ylabel("Magnitude [dB]")
plt.xlabel("Normalized frequency [cycles per sample]")
plt.axis('tight')
plt.show()



In [ ]:


In [ ]:


In [ ]:


In [237]:
from itertools import cycle

cm = cycle(tableau20[2::2])
ax = df_power.wiener.plot(figsize=(18, 8), lw=1, color=tableau20[0], alpha=.4)

for tt in list_index_events:
    ax = df_power.wiener.loc[tt[0]:tt[-1]].plot(ax=ax, lw=1, color=next(cm), alpha=.8)


PLOT Events


In [539]:
import matplotlib.patches as mpp


@timeit('plot_power_events', verbose=True)
def plot_power_events(df_power, df_feats_grupos):
    margin_y = 50
    frac_day = 1. / (24*60*60.)
    df_feats_p = df_feats_grupos.copy()
    df_feats_p['tf'] = df_feats_p['t0'] + df_feats_p.duracion.apply(lambda x: pd.Timedelta(seconds=x))
    
    f, ax = plt.subplots(1, 1, figsize=(18, 8))
    df_power.wiener.plot(ax=ax, lw=1, color=tableau20[4], alpha=.7)
    for i, row in df_feats_p.iterrows():
        r = mpp.Rectangle((mpd.date2num(row.t0) - 15 * frac_day, row.wiener_min - margin_y), 
                          row.duracion * frac_day + 15 * frac_day, row.wiener_max - row.wiener_min + 2 * margin_y, 
                          fill=True, lw=0, alpha=.6, fc=tableau20[6])
        ax.add_patch(r)
    ax.xaxis.set_major_formatter(mpd.DateFormatter('%H:%M'))
    plt.setp(ax.xaxis.get_majorticklabels(), rotation=0, ha='center')
    ax.xaxis.set_tick_params(labelsize=10)
    ax.xaxis.set_tick_params(pad=3)
    ax.set_xlabel('')
    
    
def _plot_row_evento(df_i, ax=None, row=None, with_raw=True):
    margin_y = 50
    frac_day = 1. / (24*60*60.)
    
    if ax is None:
        _, ax = plt.subplots(1, 1)
    ax.xaxis.set_major_formatter(mpd.DateFormatter('%H:%M'))
    if with_raw:
        df_i['power'].plot(ax=ax, lw=.5, color=tableau20[0], alpha=.4)
    df_i['wiener'].plot(ax=ax, lw=1, color=tableau20[4], alpha=.8)
    if row is not None:
        r = mpp.Rectangle((mpd.date2num(row.t0) - 15 * frac_day, row.wiener_min - margin_y), 
                          row.duracion * frac_day + 15 * frac_day, row.wiener_max - row.wiener_min + 2 * margin_y, 
                          fill=True, lw=0, alpha=.6, fc=tableau20[6])
        ax.add_patch(r)
        ax.annotate('E_{}'.format(row.name), (.07, .9), xycoords='axes fraction')
    plt.setp(ax.xaxis.get_majorticklabels(), rotation=0, ha='center')
    ax.xaxis.set_tick_params(labelsize=10)
    ax.xaxis.set_tick_params(pad=3)
    ax.set_xlabel('')
    return ax

    
@timeit('plot_mosaico_eventos', verbose=True)
def plot_mosaico_eventos(df_power, df_feats_grupos, n_rows=3, n_cols=3, size=5):
    
    margin_event = pd.Timedelta('5min')
    df_feats_p = df_feats_grupos.copy()
    df_feats_p['tf'] = df_feats_p['t0'] + df_feats_p.duracion.apply(lambda x: pd.Timedelta(seconds=x))
    
    f, axes = plt.subplots(n_rows, n_cols, figsize=(size * n_cols, size * n_rows))
    axes = axes.ravel()
    for i, (i_r, row) in enumerate(df_feats_p.iterrows()):
        if i >= len(axes):
            break
        df_i = df_power.loc[row.t0 - margin_event: row.tf + margin_event]
        ax = _plot_row_evento(df_i, axes[i], row, with_raw=True)

In [540]:
df_power, list_index_events, df_feats_grupos = process_power(homog_power.loc['2016-08-29'])

plot_power_events(df_power, df_feats_grupos)
plt.show()
                     
plot_mosaico_eventos(df_power, df_feats_grupos, n_rows=3, n_cols=3, size=4)
plt.show()
plot_mosaico_eventos(df_power, df_feats_grupos.iloc[9:], n_rows=3, n_cols=3, size=4)


Nº de grupos de eventos: 17 (para un nº de eventos total de: 76)
process_power TOOK: 0.637 s
plot_power_events TOOK: 5.784 s
plot_mosaico_eventos TOOK: 1.565 s
plot_mosaico_eventos TOOK: 1.202 s

In [300]:
df_power, list_index_events, df_feats_grupos = process_power(homog_power.loc['2016-08-13'])
plot_power_events(df_power, df_feats_grupos)
plt.show()
                     
plot_mosaico_eventos(df_power, df_feats_grupos, n_rows=5, n_cols=4, size=4)


Nº de grupos de eventos: 28 (para un nº de eventos total de: 211)
process_power TOOK: 0.558 s
plot_power_events TOOK: 4.923 s
plot_mosaico_eventos TOOK: 2.744 s

In [518]:
ax.annotate?

In [313]:
process_params = dict(kernel_size=15, roll_window_event=9, threshold_event=10,
                      margin_td=pd.Timedelta('120s'), margin_ant=pd.Timedelta('3s'))
POWER_FILT, lista_grupos_eventos_total, df_feats_grupos_all = process_power(homog_power, **process_params)
POWER_FILT.info()
df_feats_grupos_all.columns


Nº de grupos de eventos: 1174 (para un nº de eventos total de: 29587)
process_power TOOK: 26.207 s
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3726199 entries, 2016-08-12 10:46:25+02:00 to 2016-09-24 13:49:43+02:00
Freq: S
Data columns (total 5 columns):
power      float32
wiener     float32
medfilt    float32
deltas     float32
eventos    int16
dtypes: float32(4), int16(1)
memory usage: 92.4 MB
Out[313]:
Index(['t0', 'duracion', 'wiener_min', 'wiener_mean', 'wiener_max', 'wiener_std', 'wiener_median', 'eventos_sum',
       'eventos_abs_sum', 'deltas_max', 'deltas_min', 'deltas_sum', 'deltas_abs_sum'],
      dtype='object')

In [304]:
df_feats_grupos_all.duracion.hist(bins=500, lw=0, log=True)


Out[304]:
<matplotlib.axes._subplots.AxesSubplot at 0x1846e9780>

In [522]:
plot_mosaico_eventos(POWER_FILT, df_feats_grupos_all[df_feats_grupos_all.duracion > 300], 
                     n_rows=3, n_cols=3, size=4)


plot_mosaico_eventos TOOK: 1.605 s

In [321]:
process_params_2 = dict(kernel_size=11, roll_window_event=7, threshold_event=7,
                        margin_td=pd.Timedelta('30s'), margin_ant=pd.Timedelta('2s'))
POWER_FILT_2, lista_grupos_eventos_total_2, df_feats_grupos_all_2 = process_power(homog_power, **process_params_2)
df_feats_grupos_all_2.duracion.hist(bins=500, lw=0, log=True)


Nº de grupos de eventos: 3902 (para un nº de eventos total de: 49152)
process_power TOOK: 29.953 s
Out[321]:
<matplotlib.axes._subplots.AxesSubplot at 0x18fc29390>

In [527]:
plot_mosaico_eventos(POWER_FILT_2, df_feats_grupos_all_2[df_feats_grupos_all_2.duracion > 550], 
                     n_rows=7, n_cols=5, size=4)
plt.show()
df_feats_grupos_all_2[df_feats_grupos_all_2.duracion > 1000].head()


plot_mosaico_eventos TOOK: 11.402 s
Out[527]:
t0 duracion wiener_min wiener_mean wiener_max wiener_std wiener_median eventos_sum eventos_abs_sum deltas_max deltas_min deltas_sum deltas_abs_sum
index_ge
168 2016-08-15 13:31:06+02:00 1079 311.193390 1028.114746 2928.174316 696.325804 860.590393 -5 273 493.176849 -651.105957 759.866699 90460.734375
306 2016-08-17 14:59:00+02:00 1003 318.768066 2268.884521 3947.953613 777.626035 2599.294678 10 272 821.583740 -523.239136 14.204346 29463.310547
1692 2016-08-23 13:05:44+02:00 1028 368.142700 845.181152 1725.723999 525.455844 456.233032 0 264 564.151123 -649.993408 41.664764 86056.617188
1791 2016-08-24 08:46:43+02:00 4941 200.192764 665.801636 2697.358154 529.791026 548.091492 -22 1394 950.543091 -858.963745 110.653015 181397.656250
1869 2016-08-24 16:10:39+02:00 3655 270.432892 560.014343 1462.220459 111.440515 567.709839 34 1058 437.883606 -433.254639 7.465302 159333.484375

In [543]:
def _get_evento(id_ev, df_feats_grupos, df_power, verbose=False):
    df_ev_f = df_feats_grupos.loc[id_ev]
    #df_ev = df_power[df_power.intervalo == id_ev + 1]
    df_ev = df_power.loc[df_ev_f.t0:df_ev_f.t0 + pd.Timedelta(seconds=df_ev_f.duracion)]
    if verbose:
        print_info('* Evento {}. Features:\n{}\nHead:\n{}\nTail:\n{}'
                   .format(id_ev, pd.DataFrame(df_ev_f).T, df_ev.head(3), df_ev.tail(3)))
    return df_ev
    
_plot_row_evento(_get_evento(3512, df_feats_grupos_all_2, POWER_FILT_2, verbose=True), row=df_feats_grupos_all_2.loc[3512]);


* Evento 3512. Features:
                             t0 duracion wiener_min wiener_mean wiener_max wiener_std wiener_median eventos_sum  \
3512  2016-09-20 14:14:11+02:00      624    297.738     723.665     1690.4    525.971       336.845           8   

     eventos_abs_sum deltas_max deltas_min deltas_sum deltas_abs_sum  
3512             160    719.185    -684.29    12.2385        53386.1  
Head:
                                power      wiener     medfilt      deltas  eventos  intervalo
ts                                                                                           
2016-09-20 14:14:11+02:00  320.202942  324.458282  318.347992   16.946106        0       3513
2016-09-20 14:14:12+02:00  318.347992  322.300629  320.202942   -2.157654        0       3513
2016-09-20 14:14:13+02:00  778.441284  778.724243  778.441284  456.423615        1       3513
Tail:
                                power      wiener     medfilt    deltas  eventos  intervalo
ts                                                                                         
2016-09-20 14:24:33+02:00  318.290680  320.416748  319.614044 -1.239502        0       3513
2016-09-20 14:24:34+02:00  325.512299  319.750610  318.664642 -0.666138        0       3513
2016-09-20 14:24:35+02:00  325.909332  318.987091  318.664642 -0.763519        0       3513

In [613]:
grupo_1 = [167, 168, 1692, 1693, 2354, 2897, 3512, 3082]  # 3082 no es del grupo

f, axes = plt.subplots(4, 2, figsize=(8, 16))
f2, axes_2 = plt.subplots(4, 2, figsize=(8, 16))
for id_ev, ax, ax_2 in zip(grupo_1, axes.ravel(), axes_2.ravel()):
    df_i = _get_evento(id_ev, df_feats_grupos_all_2, POWER_FILT_2)
    fft_mag_i = np.sqrt(np.abs(scipy.fftpack.rfft(df_i.power)[:len(df_i) // 2]))
    
    abs_freqs = np.array(list(sorted(zip(fft_mag_i[1:], range(1, fft_mag_i.shape[0])), reverse=True)))
    #abs_freqs.sort(axis=0)
    
    #print(np.argmax(fft_mag_i[1:]), fft_mag_i[np.argmax(fft_mag_i[1:]) + 1])
    
    ax.plot(fft_mag_i, lw=1, color=tableau20[0])
    max_abs, idx_max_abs = np.max(fft_mag_i), np.argmax(fft_mag_i)
    for a, f in abs_freqs[:3,:]:
        ax.annotate('f={}\n{:.2f}'.format(int(f), a / max_abs), (f, a),
                    xytext=(60, 10), textcoords='offset points', ha='right', fontsize=8, color=tableau20[1], 
                    arrowprops=dict(arrowstyle="->",
                                connectionstyle="arc3,rad=0.3"))
    ax.annotate('Amax={:.1f}, en f={:.0f}.\n* Best 5 freqs={}'.format(max_abs, idx_max_abs, abs_freqs[:5,1]), (.04, .85),
                xycoords='axes fraction', ha='left', fontsize=12, color=tableau20[2])

    
    _plot_row_evento(df_i, ax=ax_2)



In [ ]:


In [1128]:
#autocorrelation_plot(df_i.power - df_i.power.mean()) 

sns.set_style('ticks')

def _gen_ts_sinusoidal(Ts=1, tf=3600, 
                       freqs=[1 / 300, 1 / 225, 1 / 60, 1/20], 
                       amps=[3, 7, 2, .5],
                       offsets=[3, 7, -5, .15], 
                       mean=30):
    N = tf // Ts
    tt = pd.DatetimeIndex(freq=pd.Timedelta(seconds=Ts), start=pd.Timestamp.now(tz=TZ).replace(microsecond=0), 
                          periods=N)
    t = np.linspace(0, tf, N)
    x = mean #+ np.random.random(N) * 0.3
    for a, f, o in zip(amps, freqs, offsets):
        x += a * np.cos(2*np.pi*f*t + o)
    return tt, x


def _plot_tt_fft(tt, x, df_fft, best_fft=None, log_mag=False, figsize=(8, 8)):
    # PLOT tt + fft
    fig, axes = plt.subplots(2, 1, figsize=figsize, 
                             gridspec_kw=dict(hspace=0, wspace=0, height_ratios=[.45, .55]))

    ax = axes[0]
    ax.xaxis.tick_top()
    ax.plot(tt, x, color=tableau20[4], lw=1, alpha=.8)
    ax.xaxis.set_major_formatter(mpd.DateFormatter('%H:%M:%S'))
    plt.setp(ax.xaxis.get_majorticklabels(), rotation=0, ha='center')
    ax.xaxis.set_tick_params(labelsize=8, pad=2)
    # ax.set_xlabel('Time', labelpad=1)
    ax.set_ylabel('(Power) time series', labelpad=3)
    ax.grid('on', axis='x')
    ax.set_yticks(ax.get_yticks()[1:])

    # FFT plot
    ax = axes[1]
    ax.xaxis.tick_bottom()
    df_fft.mag.plot(ax=ax, color=tableau20[0], lw=1.25, alpha=.8, label='(Mag)')
    if best_fft is not None:
        for i, (f, row) in enumerate(best_fft.iterrows()):
            fr = row.frac_mag
            disp = 1 - fr
            ax.plot([f], [row.mag], 'o', markersize=10, markeredgewidth=1.5, markeredgecolor=tableau20[2], markerfacecolor=[0, 0, 0, 0])
            ax.annotate('f=1/{:.1f}, A={:.2g}'.format(row.cycles_f, row.frac_mag), (f, row.mag),
                        #xytext=(.75 + i * .05, row.frac_mag * .9), textcoords='axes fraction', 
                        xytext=(.97, .8 - i * .075), textcoords='axes fraction', 
                        ha='right', va='bottom', fontsize=8, color=tableau20[2], 
                        arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=-0.05"))
        ax.annotate('A(f0)={:.2f}'.format(np.sqrt(z_0) / best_fft.mag.iloc[0]), (.97, .92), xycoords='axes fraction', 
                    ha='right', fontsize=10, color=tableau20[14])
    ax.set_xlabel('Freq (Hz)', labelpad=1)
    #ax.set_xlim((0, max(.1, best_fft.index.max())))
    ax.set_xlim((0, min(.1, best_fft.index.max() * 10)))
    if log_mag:
        ax.set_ylabel('Mag (20·log10|mag|)', labelpad=3)
        ax.set_ylim((-60, ax.get_ylim()[1]))
    else:
        ax.set_ylabel('Mag (√|mag|)', labelpad=3)
    #ax.set_xlim((2/N, (1/Ts)/2))
    #ax.set_xlim((2/N, .5))
    plt.setp(ax.xaxis.get_majorticklabels(), rotation=0, ha='center')
    ax.xaxis.set_tick_params(labelsize=9, pad=3)
    ax.set_yticks(ax.get_yticks()[:-1])
    #plt.show()
    return axes


@timeit('CALC FFT FEATURES', verbose=True)
def _feat_fft(tt, x, return_best=5, log_mag=False, plot=False, figsize=(8, 8), verbose=False):
    # Scipy Real FFT
    z_pure = 2 * scipy.fftpack.rfft(x) / len(x) # FFT
    y = scipy.fftpack.rfftfreq(len(x), d=(tt[1] - tt[0]).total_seconds()) # Frequency data
    #idx = np.argsort(y)
    #idx_mag = np.argsort(z)[::-1]
    
    # With pandas
    z_0 = z_pure[0]
    df = pd.DataFrame([z_pure[1:]**2, y[1:]], index=['mag', 'freq']).T.abs()
    if log_mag:
        df_fft = df.groupby('freq').sum().apply(lambda x: 20*np.log10(x))
        df_fft['frac_mag'] = (df_fft['mag'] / df_fft['mag'].max()).apply(lambda x: np.exp(x/20))
    else:
        df_fft = df.groupby('freq').sum().apply(np.sqrt)
        df_fft['frac_mag'] = df_fft['mag'] / df_fft['mag'].max()
    df_fft['cycles_f'] = [1/x for x in df_fft.index]

    if verbose:
        print(len(z_pure), sum(z_pure), sum(z_pure**2))
        print_red(df_fft.sort_values(by='mag', ascending=False).head())
    
    if log_mag:
        best_fft = df_fft.sort_values(by='mag', ascending=False).head(return_best)
    else:
        best_fft = df_fft[df_fft.frac_mag > .05].sort_values(by='mag', ascending=False).head(return_best)

    if plot:
        axes = _plot_tt_fft(tt, x, df_fft, best_fft, log_mag=log_mag, figsize=figsize)
        plt.show()
    return best_fft


#_feat_fft(*_gen_ts_sinusoidal(amps=[3, 7, 2, .5]), plot=True, figsize=(4, 4));
_feat_fft(*_gen_ts_sinusoidal(amps=[3, 7, 2, .5], tf=36000), plot=True, figsize=(4, 4));
_feat_fft(*_gen_ts_sinusoidal(amps=[3, 7, 2, .5], tf=1800), plot=True, figsize=(4, 4));


CALC FFT FEATURES TOOK: 1.695 s
CALC FFT FEATURES TOOK: 0.619 s

In [1130]:
power_standby
#df1 = _feat_fft(power_standby.index, power_standby.power, plot=True, log_mag=True, figsize=(10, 7))
df1 = _feat_fft(power_standby.index, power_standby.power, plot=True, log_mag=False, figsize=(10, 7))
df2 = _feat_fft(power_standby.index, power_standby.wiener, plot=True, log_mag=False, figsize=(10, 7))


CALC FFT FEATURES TOOK: 3.843 s
CALC FFT FEATURES TOOK: 5.027 s

In [ ]:


In [1319]:
@timeit('slice_window_fft', verbose=True)
def _slice_window_fft(data, size_w, frac_solape=.5, only_complete=True):
    
    def _feat_fft_fast(tt, x, with_freq=True):
        # Scipy Real FFT
        z_pure = scipy.fftpack.rfft(x) / len(x)
        mags = np.sqrt(z_pure[1:-1:2]**2 + z_pure[2:-1:2]**2)
        if with_freq:
            y = scipy.fftpack.rfftfreq(len(x), d=(tt[1] - tt[0]).total_seconds()) # Frequency data
            freqs = y[1:-1:2]
            return freqs, mags
        return mags
    
    N = len(data)
    start = data.index[0]
    end = data.index[-1]
    T = end - start
    Ts = data.index[1] - start
    delta = (1 - frac_solape) * size_w * Ts
    n = (T - size_w * Ts) / delta + 1
    results, freqs, init = [], None, False
    while start < end:
        data_w = data.loc[start:start + size_w * Ts - Ts]
        #print(data_w.tail(2))
        if only_complete and (data_w.shape[0] == size_w):
            if not init:
                freqs, mags = _feat_fft_fast(data_w.index, data_w.power, with_freq=True)
                init = True
            else:
                mags = _feat_fft_fast(data_w.index, data_w.power, with_freq=False)
            results.append((start, mags))
        #else:
        #    break
        start += delta
    #return pd.DataFrame(results)
    return freqs, results

    
# %timeit _slice_window_fft(power_standby, 14400, frac_solape=.75)
# 100 loops, best of 3: 10.4 ms per loop

def fft_window_power(power, delta='3h', step='15min'):
    window_delta = pd.Timedelta(delta)
    window_step = pd.Timedelta(step)
    fr_s = 1 - window_step / window_delta
    window_size = window_delta / power.index.freq
    print_cyan('Window size: {:.0f}, step: {}, (solape={:.3f})'.format(window_size, window_step, fr_s))
    freqs, results = _slice_window_fft(power, window_size, fr_s)
    return pd.DataFrame([pd.Series(data=r[1], index=freqs, name=r[0]) for r in results]).T #freqs, results

df_results = fft_window_power(power_standby, delta='3h', step='10min')
df_results_2 = fft_window_power(power_day14, delta='3h', step='10min')
df_results_3 = fft_window_power(power_day13, delta='3h', step='10min')
#fft_slice = 
#fft_slice


Window size: 10800, step: 0 days 00:10:00, (solape=0.944)
slice_window_fft TOOK: 0.075 s
Window size: 10800, step: 0 days 00:10:00, (solape=0.944)
slice_window_fft TOOK: 0.065 s
Window size: 10800, step: 0 days 00:10:00, (solape=0.944)
slice_window_fft TOOK: 0.060 s

In [1321]:
# FFT POWER

results_FFT = fft_window_power(POWER_FILT_2, delta='10min', step='5min')
print_ok(results_FFT.shape)
#results_FFT


Window size: 600, step: 0 days 00:05:00, (solape=0.500)
slice_window_fft TOOK: 4.892 s
(299, 12419)

In [1292]:
plt.figure(figsize=(16, 9))
ax = plt.subplot(121)
results_FFT.loc[:.006].plot(ax=ax, lw=.5, color='k', alpha=.01)
plt.legend([])

ax = plt.subplot(122)
results_FFT.loc[.006:].plot(ax=ax, lw=.5, color='k', alpha=.1)
plt.legend([])


Out[1292]:
<matplotlib.legend.Legend at 0x235fa6f98>

In [1296]:
results_FFT.loc[:.006].shape, results_FFT.loc[.006:].shape, results_FFT.loc[:.2].shape
#(results_FFT.loc[:.006] - results_FFT.loc[:.006].rolling(10).mean()) #.plot()


Out[1296]:
((64, 6193), (5335, 6193), (2160, 6193))

In [1329]:
from sklearn.cluster import KMeans, DBSCAN, AffinityPropagation, AgglomerativeClustering
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler


X = results_FFT.T.values
X_std = StandardScaler().fit_transform(X)

m_pca_1 = PCA(n_components=2, copy=True, whiten=True)
X_pca = m_pca_1.fit_transform(X)
m_pca_1


Out[1329]:
PCA(copy=True, n_components=2, whiten=True)

In [1332]:
len(X), len(X_pca), len(model_hd.labels_)


Out[1332]:
(12419, 12419, 24)

In [ ]:
m_pca_20 = PCA(n_components=20, copy=True, whiten=True)
X_pca20 = m_pca_20.fit_transform(X_std)
print(X_pca20.shape)


m_tsne_20 = TSNE(n_components=20)
X_tsne20 = m_tsne_20.fit_transform(X_std)
print(m_tsne_20.shape)


(12419, 20)

In [1331]:
k = 11
#labels_pca, n_clusters_pca, model_pca, X_pca_2c = pca_clustering(X, k, whiten=True)
plot_clustering_figure(X, model_hd.labels_, plot_clustering_as_voronoi, X_pca, model_pca)


/Users/uge/Dropbox/PYTHON/PYPROJECTS/enerpi/enerpi/process_ldr.py:125: VisibleDeprecationWarning: boolean index did not match indexed array along dimension 0; dimension is 12419 but corresponding boolean dimension is 24
  ax.scatter(reduced_data[idx, 0], reduced_data[idx, 1], color='k', s=10, alpha=.7)
/Users/uge/anaconda/envs/py35/lib/python3.5/site-packages/sklearn/metrics/cluster/unsupervised.py:193: VisibleDeprecationWarning: boolean index did not match indexed array along dimension 0; dimension is 12419 but corresponding boolean dimension is 24
  a = np.mean(distances_row[mask])
/Users/uge/anaconda/envs/py35/lib/python3.5/site-packages/sklearn/metrics/cluster/unsupervised.py:219: VisibleDeprecationWarning: boolean index did not match indexed array along dimension 0; dimension is 12419 but corresponding boolean dimension is 24
  for cur_label in set(labels) if not cur_label == label])
For n_clusters = 3 The average silhouette_score is : -0.487292976993
/Users/uge/Dropbox/PYTHON/PYPROJECTS/enerpi/enerpi/process_ldr.py:87: VisibleDeprecationWarning: boolean index did not match indexed array along dimension 0; dimension is 12419 but corresponding boolean dimension is 24
  ax.plot(X[class_member_mask, 0], X[class_member_mask, 1], 'o', color=col, lw=0, markersize=size_scatter, alpha=.7)
PLOT CLUSTERING TOOK: 9.343 s

In [1287]:
ax = plt.subplot(121)
df_results_2.loc[:.006].plot(ax=ax, lw=.75, color='k', alpha=.1)
plt.legend([])

ax = plt.subplot(122)
df_results_2.plot(ax=ax, lw=.5, color='k', alpha=.005)
plt.legend([])


Out[1287]:
<matplotlib.legend.Legend at 0x1f5a33320>

In [1270]:
#pd.DataFrame([pd.Series(data=r[1], index=[1/x for x in freqs], name=r[0]) for r in results]).T.plot(lw=.75)
#pd.DataFrame([pd.Series(data=r[1], index=freqs, name=r[0].time()) for r in results]).T.loc[:.06].plot(lw=.75, alpha=.3, legend='off')

ax = plt.subplot(121)
df_results.loc[:.006].plot(ax=ax, lw=.75, color='k', alpha=.01)
plt.legend([])

ax = plt.subplot(122)
df_results.plot(ax=ax, lw=.5, color='k', alpha=.005)
plt.legend([])


Out[1270]:
<matplotlib.legend.Legend at 0x21c5a6a20>

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [1132]:
ax = plt.subplot(111)
labels_c = ['c{:02d}'.format(i + 1) for i in range(7)]
labels_mag = ['mag{:02d}'.format(i + 1) for i in range(7)]
for id_w in fft_slice.index:
    ax.plot(fft_slice.loc[id_w].loc[labels_c], fft_slice.loc[id_w].loc[labels_mag], lw=1)



In [1133]:
pca_clustering(fft_slice.drop('start', axis=1), 3)


PCA Explained variance ratio: [ 0.94434091  0.03944973] --> 0.983790637749455
Estimated number of clusters: 3
Silhouette Coefficient: 0.899
                1  0  2
Label members  15  8  1
Out[1133]:
(array([1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1,
        2], dtype=int32),
 3,
 KMeans(copy_x=True, init='k-means++', max_iter=300, n_clusters=3, n_init=10,
     n_jobs=1, precompute_distances='auto', random_state=None, tol=0.0001,
     verbose=0),
 array([[-2956.1641298 ,   425.55533299],
        [-3007.54185443,   654.04292741],
        [-2097.27840382, -1017.15072571],
        [ 5165.81056636,  1117.38085794],
        [-2877.16950807,    25.80630266],
        [ 5729.78799334,  -928.92545474],
        [ 5713.63386548,  -854.76767559],
        [ 5657.82575002,  -528.39466237],
        [-2863.30057696,   -81.11564305],
        [ 5138.42045138,  1328.54383895],
        [-2091.63497367, -1086.32240548],
        [-2947.73469331,   262.13073913],
        [-2863.30119502,   -81.11841383],
        [-2742.57946578,  -419.9339487 ],
        [ 5659.83283826,  -594.17079222],
        [-2097.28047211, -1017.16484652],
        [-2038.09173807, -1320.28410759],
        [-2721.29581277,  -519.74812697],
        [ 5179.68285805,  1010.47172068],
        [ 5179.68276868,  1010.47176235],
        [-2956.50063751,   325.14775241],
        [ 1306.04814523,    91.75549543],
        [-3006.17855352,   572.22767264],
        [-7464.67322197,  1625.56240019]]))

In [1136]:
from hdbscan import HDBSCAN

X_features = fft_slice.drop('start', axis=1).values
k = 5
labels_pca, n_clusters_pca, model_pca, X_pca_2c = pca_clustering(X_features, k, whiten=True)
plot_clustering_figure(X_features, labels_pca, plot_clustering_as_voronoi, X_pca_2c, model_pca)

model_hd = HDBSCAN().fit(X_features)
plot_clustering_figure(X_pca_2c, model_hd.labels_, plot_clustering)


PCA Explained variance ratio: [ 0.94434091  0.03944973] --> 0.983790637749455
Estimated number of clusters: 5
Silhouette Coefficient: 0.838
               1  3  4  2  0
Label members  9  6  4  4  1
For n_clusters = 5 The average silhouette_score is : 0.467296424877
PLOT CLUSTERING TOOK: 4.769 s
For n_clusters = 3 The average silhouette_score is : 0.508488024416
PLOT CLUSTERING TOOK: 4.965 s

In [919]:
@timeit('local_maxima_minima', verbose=True)
def _local_max(serie, roll_w=3, verbose=False):

    def _is_local_max(x):
        #if np.max(x) == x[len(x) // 2]:
        if np.argmax(x) == len(x) // 2:
            return True
        return False

    if verbose:
        print_secc('LOCAL MAX de {} valores. Centered window of {} samples'
                   .format(len(serie), roll_w))
    maximos = serie.rolling(roll_w, center=True).apply(_is_local_max).fillna(0).astype(bool)
    if verbose:
        print_info(serie[maximos].head())
    return maximos


#cycles = [1/x for x in y]
#plt.scatter(cycles, z)
#plt.xlim(0, 310)
#cycles
#z[reversed(idx_mag)]
#np.argsort?

s = df_fft.mag
print_red(s.max())
maximos = _local_max(s, 7)

df_fft[maximos][s[maximos] / s.max() > .07]


112.190484733
local_maxima_minima TOOK: 0.004 s
Out[919]:
mag cycles_f frac_mag
freq
0.003889 112.190485 257.142857 1.000000
0.013889 10.464811 72.000000 0.093277
0.016667 66.091980 60.000000 0.589105
0.018333 9.176115 54.545455 0.081790
0.020278 8.397489 49.315068 0.074850
0.048889 27.456626 20.454545 0.244732

In [612]:
#abs_freqs = np.array(list(sorted(zip(fft_mag_i[1:], range(1, fft_mag_i.shape[0])), reverse=True)))
#abs_freqs.sort(axis=0)

#ax = _plot_row_evento(df_i)
ax = plt.subplot()
ax.plot(fft_mag_i, lw=1, color=tableau20[0])
max_abs, idx_max_abs = np.max(fft_mag_i), np.argmax(fft_mag_i)
for a, f in abs_freqs[:3,:]:
    ax.annotate('f={}\n{:.2f}'.format(int(f), a / max_abs), (f, a),
                xytext=(60, 10), textcoords='offset points', ha='right', fontsize=8, color=tableau20[1], 
                arrowprops=dict(arrowstyle="->",
                            connectionstyle="arc3,rad=0.3"))
ax.annotate('Amax={:.1f}, en f={:.0f}.\n* Best 5 freqs={}'.format(max_abs, idx_max_abs, abs_freqs[:5,1]), (.04, .85),
            xycoords='axes fraction', ha='left', fontsize=12, color=tableau20[2])


996.766
553 20
239 40
227 59
Out[612]:
<matplotlib.text.Annotation at 0x19172ae80>

In [369]:
#df_feats_grupos_all_2[df_feats_grupos_all_2.duracion > 1000]
import sklearn.cluster as cluster
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from hdbscan import HDBSCAN

cols_cluster = ['deltas_sum', 'wiener_median', 'wiener_std', 'duracion', 'wiener_min', 'wiener_mean', 'wiener_max', 
                'eventos_sum', 'eventos_abs_sum', 'deltas_max', 'deltas_min', 'deltas_abs_sum']
df_X = df_feats_grupos_all_2[cols_cluster]
X = df_X.values
X_std = StandardScaler().fit_transform(X)

In [366]:
from enerpi.process_ldr import pca_clustering, plot_clustering, plot_clustering_as_voronoi, plot_clustering_figure

k = 36
labels_pca, n_clusters_pca, model_pca, X_pca = pca_clustering(X, k, whiten=True, labels_true=None)
print(n_clusters_pca)

plot_clustering_figure(X, labels_pca, plot_clustering_as_voronoi, X_pca, model_pca)


PCA Explained variance ratio: [ 0.98799845  0.00875039] --> 0.9967488392675097
Estimated number of clusters: 36
Silhouette Coefficient: 0.653
                 16   29   0    11   28   20  15  25  5   1  ...  4   27  12  17  6   2   7   26  33  22
Label members  1072  794  756  306  126  102  90  89  88  71 ...   3   2   2   2   2   2   1   1   1   1

[1 rows x 36 columns]
36
For n_clusters = 36 The average silhouette_score is : 0.0925662184395
PLOT CLUSTERING TOOK: 13.193 s

In [375]:
model_hd = HDBSCAN(min_samples=3)
model_hd.fit(X_std)
vc = pd.Series(model_hd.labels_).value_counts()
print(len(vc) - 1, 'clusters')
print_info(vc.head())
plot_clustering_figure(X_std, model_hd.labels_, plot_clustering, model_hd)


60 clusters
 35    1406
-1      569
 22     344
 45     323
 18     199
dtype: int64
For n_clusters = 61 The average silhouette_score is : 0.214001019588
PLOT CLUSTERING TOOK: 22.389 s

In [377]:
df_feats_grupos_all_2


Out[377]:
t0 duracion wiener_min wiener_mean wiener_max wiener_std wiener_median eventos_sum eventos_abs_sum deltas_max deltas_min deltas_sum deltas_abs_sum
index_ge
0 2016-08-12 10:46:28+02:00 95 -1.000000 213.347885 331.670654 147.967381 311.287781 -1 15 271.602325 -303.736267 7.931885 710.998901
1 2016-08-12 10:52:58+02:00 70 -1.000000 184.577026 318.408142 154.389941 306.797699 -1 13 285.027222 -286.516541 1.574768 658.450684
2 2016-08-12 11:03:33+02:00 49 319.529663 366.452301 830.842773 95.759357 350.792023 -3 5 456.764282 -419.854736 30.661011 1078.235596
3 2016-08-12 11:48:39+02:00 63 336.026703 1653.730957 2285.592529 593.475410 2073.375000 0 8 519.634644 -161.533936 1741.777344 2305.727783
4 2016-08-12 11:49:43+02:00 126 315.759583 2057.140381 3449.008545 864.798098 1947.901733 2 24 811.942627 -1057.249023 -135.291748 8070.782715
5 2016-08-12 11:52:56+02:00 52 327.364227 699.996704 1953.853394 662.773528 340.282959 2 6 11.216675 -777.610291 -1589.116943 1695.518799
6 2016-08-12 13:16:49+02:00 42 318.791046 369.229706 843.292847 99.297872 346.718964 1 5 340.940002 -261.791412 25.908508 1078.473145
7 2016-08-12 14:07:03+02:00 66 384.219940 682.953674 1245.472046 386.372169 414.706238 0 10 519.228271 -350.620422 -7.729492 1905.845947
8 2016-08-12 14:09:07+02:00 48 375.858002 429.812012 869.665894 136.192613 379.317108 -1 5 241.166901 -199.529602 5.617523 1036.391113
9 2016-08-12 14:09:58+02:00 51 371.814880 425.714142 885.445129 132.240415 378.975311 1 7 291.501099 -218.297363 0.681885 1071.492920
10 2016-08-12 14:24:12+02:00 59 248.813629 1074.538818 1557.705566 585.416972 1443.249634 0 8 454.151001 -23.250610 1223.965210 1489.977905
11 2016-08-12 14:25:10+02:00 56 1542.888306 2800.488525 3274.198975 744.298705 3245.197754 1 9 806.655273 -19.217529 1725.098389 1886.079590
12 2016-08-12 14:26:33+02:00 144 236.715973 1905.867798 3226.768311 1090.469681 2269.698730 3 25 174.371231 -704.422913 -2586.508301 3848.562012
13 2016-08-12 14:28:56+02:00 65 216.450455 356.599945 638.983398 183.108201 230.739151 0 10 176.961029 -206.506744 -404.221039 1344.700439
14 2016-08-12 14:41:52+02:00 51 334.695953 390.339203 826.116699 87.140935 375.771729 -1 7 367.862610 -392.089020 37.880768 1016.336548
15 2016-08-12 14:43:50+02:00 35 373.910248 380.754242 389.752380 4.202236 381.459747 0 2 3.473328 -4.429138 -2.711945 41.629852
16 2016-08-12 14:46:51+02:00 35 371.504730 377.976135 386.376312 3.538253 378.242493 0 2 3.331726 -2.428894 12.690826 46.098175
17 2016-08-12 15:01:52+02:00 42 450.990936 1252.200073 1437.817261 360.716193 1420.174194 0 4 497.189880 -10.725220 965.856628 1062.635864
18 2016-08-12 15:02:58+02:00 97 371.360260 802.468994 1470.946899 472.526140 481.057617 1 15 489.471558 -476.356262 -1033.641113 3329.862549
19 2016-08-12 15:25:58+02:00 35 364.482056 376.705566 388.484711 7.805982 374.783264 0 2 3.971161 -7.397430 -0.144897 54.063721
20 2016-08-12 17:12:10+02:00 42 335.102753 382.452057 874.604919 105.449887 357.126099 0 4 455.121918 -367.831207 19.054596 1114.985596
21 2016-08-12 18:58:08+02:00 35 347.219696 408.801270 444.620605 35.493184 426.421112 0 2 4.650177 -10.067200 -73.895599 125.814240
22 2016-08-12 19:08:24+02:00 50 338.458160 406.577576 880.766785 92.206725 396.892975 1 7 340.790619 -289.284973 49.797638 1124.625488
23 2016-08-12 21:01:46+02:00 49 352.432190 393.919281 871.348145 94.194742 370.082733 -1 7 324.385498 -382.312195 8.053436 1103.775269
24 2016-08-12 21:04:07+02:00 37 354.936371 359.749908 365.325928 3.109157 359.854736 1 3 3.082458 -2.559723 10.809021 43.089661
25 2016-08-12 21:05:29+02:00 470 337.155060 706.560059 2073.625977 652.253354 359.603149 -3 79 877.129700 -856.046875 -15.114960 33773.367188
26 2016-08-12 23:00:30+02:00 43 220.017273 270.649292 794.580933 111.582850 244.588425 -1 5 473.026550 -340.667206 21.786911 1208.469360
27 2016-08-13 00:10:06+02:00 39 255.999695 285.544739 318.496918 14.362259 279.889313 -1 3 13.366547 -5.461639 36.524048 124.573547
28 2016-08-13 00:29:23+02:00 35 236.687790 257.781555 337.598267 33.813673 237.901108 0 2 4.311676 -10.552582 -94.971512 110.793167
29 2016-08-13 00:44:33+02:00 47 234.620346 283.126282 788.902649 101.533034 259.955444 1 7 386.108459 -281.935364 25.037567 1126.479248
... ... ... ... ... ... ... ... ... ... ... ... ... ...
3872 2016-09-23 22:03:24+02:00 284 291.268921 344.926880 390.529663 33.053171 362.545532 -5 47 5.950043 -10.809509 -84.978424 597.297241
3873 2016-09-23 22:08:13+02:00 35 295.740692 306.341644 320.320557 7.895194 303.713654 0 2 5.081970 -4.617889 -2.359253 63.257629
3874 2016-09-23 22:09:28+02:00 38 304.754242 1556.547852 1799.972534 517.742214 1782.574707 -1 3 675.955078 -20.276489 1485.841553 1572.636841
3875 2016-09-23 22:10:07+02:00 105 290.419006 455.622742 1794.433960 447.265616 300.009460 1 13 11.421387 -712.152283 -1497.468506 1657.244873
3876 2016-09-23 22:12:10+02:00 65 284.832031 299.141144 307.782410 6.045718 299.984589 1 7 5.512939 -4.222626 14.158783 110.175812
3877 2016-09-23 22:15:50+02:00 35 288.301331 298.719879 306.036774 5.156149 300.090210 0 2 4.178406 -3.948914 1.638702 59.262543
3878 2016-09-23 22:21:56+02:00 45 296.596619 389.468872 906.688782 107.435537 371.189941 -1 5 331.988220 -410.212616 74.200195 1304.141357
3879 2016-09-23 22:30:18+02:00 35 349.858093 358.982086 370.358856 6.158542 359.473938 0 2 4.457825 -3.603302 -6.530182 59.399139
3880 2016-09-23 22:32:05+02:00 78 345.611420 384.811615 433.539093 28.508431 369.958282 -2 10 10.666504 -6.104034 59.828400 217.505707
3881 2016-09-23 22:35:36+02:00 35 398.000488 407.302734 412.446503 3.438697 408.265381 0 2 4.295837 -2.080688 8.877777 43.291840
3882 2016-09-23 22:56:51+02:00 35 345.957825 354.440399 363.345306 5.298645 355.038483 0 2 4.525940 -3.471588 10.270721 50.880157
3883 2016-09-23 23:01:05+02:00 34 348.263184 359.782806 372.928864 8.252178 355.215698 0 2 2.833252 -4.972382 -14.624878 49.518738
3884 2016-09-23 23:01:57+02:00 46 351.337769 358.048187 370.553131 4.818839 357.312317 -1 5 4.000366 -4.496765 -5.907990 79.472626
3885 2016-09-23 23:03:38+02:00 38 350.332245 356.863068 366.159485 3.807837 356.932373 1 3 4.713593 -3.772919 -1.208740 56.802490
3886 2016-09-23 23:05:15+02:00 55 348.084229 355.676941 365.686401 3.458085 354.503510 1 5 4.029816 -3.740112 -15.858185 74.805084
3887 2016-09-23 23:06:24+02:00 34 346.651398 353.460724 361.233307 3.820624 352.625427 0 2 2.885590 -5.087524 -5.051910 46.446747
3888 2016-09-23 23:09:15+02:00 37 343.037262 349.416260 357.749451 3.951957 348.498016 1 3 3.314087 -2.574921 0.543579 46.025146
3889 2016-09-24 00:17:37+02:00 44 237.924332 257.133575 344.056427 31.076554 242.726822 0 4 2.393616 -23.222443 -102.177872 134.767746
3890 2016-09-24 00:29:06+02:00 47 232.126434 285.358063 805.517151 104.520683 262.475403 1 7 374.178101 -311.240051 30.615143 1155.831299
3891 2016-09-24 01:01:58+02:00 35 235.381531 246.109741 256.410614 5.551030 245.011688 0 2 4.022339 -3.267914 4.749435 53.244186
3892 2016-09-24 02:27:40+02:00 47 219.377304 268.780884 820.138306 111.161627 244.288788 0 6 299.392639 -422.070404 21.575333 1241.129150
3893 2016-09-24 02:36:22+02:00 37 232.821579 238.722519 244.304535 3.150476 237.685974 -1 3 3.266190 -3.695953 -0.554337 43.784988
3894 2016-09-24 04:29:02+02:00 50 201.390900 243.853912 794.161926 107.793400 219.857605 0 8 296.787842 -432.986023 21.452682 1236.305542
3895 2016-09-24 06:34:29+02:00 46 199.956360 249.113510 799.321350 113.201800 225.011490 0 6 325.853851 -456.355469 22.255005 1230.104248
3896 2016-09-24 08:22:55+02:00 61 183.987488 1633.567261 2581.323730 569.852613 1942.151245 0 10 557.690796 -409.314941 1753.809326 3149.048828
3897 2016-09-24 08:24:12+02:00 44 972.826904 1131.491699 1947.510254 340.859037 983.071289 0 4 11.307068 -484.373413 -964.800293 1049.698975
3898 2016-09-24 08:25:21+02:00 122 185.840225 1667.155151 3336.057617 1193.864366 1562.913574 -1 23 437.185791 -677.279419 -784.487000 5894.453125
3899 2016-09-24 08:36:19+02:00 48 179.805695 273.438446 838.390625 119.694354 255.834213 0 6 495.703217 -525.354736 68.672318 1361.744141
3900 2016-09-24 10:27:50+02:00 49 253.827408 315.619141 833.312988 102.740063 294.286621 0 6 475.860992 -362.877838 51.816315 1259.275757
3901 2016-09-24 12:52:47+02:00 46 263.663971 323.428314 887.465454 120.130861 288.360535 -1 5 465.940887 -490.882324 2.905640 1312.765869

3902 rows × 13 columns


In [380]:
POWER_FILT_2['intervalo'] = 0
POWER_FILT_2.loc[pd.DatetimeIndex(df_feats_grupos_all_2.t0), 'intervalo'] = 1
POWER_FILT_2['intervalo'] = POWER_FILT_2['intervalo'].cumsum()
POWER_FILT_2


Out[380]:
power wiener medfilt deltas eventos intervalo
ts
2016-08-12 10:46:25+02:00 321.977661 308.650421 306.766022 0.000000 0 0
2016-08-12 10:46:26+02:00 321.467957 309.759857 306.766022 1.109436 0 0
2016-08-12 10:46:27+02:00 321.467957 310.723175 306.766022 0.963318 0 0
2016-08-12 10:46:28+02:00 312.116974 303.282471 306.766022 -7.440704 0 1
2016-08-12 10:46:29+02:00 306.766022 300.141235 310.393005 -3.141235 0 1
2016-08-12 10:46:30+02:00 310.393005 311.287781 311.656708 11.146545 1 1
2016-08-12 10:46:31+02:00 304.283630 310.553619 311.656708 -0.734161 -1 1
2016-08-12 10:46:32+02:00 297.700317 309.260498 310.393005 -1.293121 0 1
2016-08-12 10:46:33+02:00 300.129700 307.857758 307.243713 -1.402740 0 1
2016-08-12 10:46:34+02:00 311.656708 307.926208 307.243713 0.068451 1 1
2016-08-12 10:46:35+02:00 316.205658 309.283875 310.393005 1.357666 0 1
2016-08-12 10:46:36+02:00 313.901764 309.797272 311.656708 0.513397 0 1
2016-08-12 10:46:37+02:00 307.243713 309.818146 311.656708 0.020874 0 1
2016-08-12 10:46:38+02:00 306.037811 311.205261 312.869995 1.387115 0 1
2016-08-12 10:46:39+02:00 312.869995 313.133575 312.958618 1.928314 0 1
2016-08-12 10:46:40+02:00 321.700439 314.464050 313.901764 1.330475 0 1
2016-08-12 10:46:41+02:00 316.040131 315.646576 313.901764 1.182526 0 1
2016-08-12 10:46:42+02:00 304.513336 317.307404 316.040131 1.660828 0 1
2016-08-12 10:46:43+02:00 312.958618 319.688416 321.341064 2.381012 0 1
2016-08-12 10:46:44+02:00 321.341064 321.928955 321.700439 2.240540 0 1
2016-08-12 10:46:45+02:00 326.292053 323.831604 326.292053 1.902649 0 1
2016-08-12 10:46:46+02:00 329.213257 325.345337 329.213257 1.513733 0 1
2016-08-12 10:46:47+02:00 332.171112 327.415283 330.683929 2.069946 0 1
2016-08-12 10:46:48+02:00 333.434723 329.848053 331.273651 2.432770 0 1
2016-08-12 10:46:49+02:00 330.683929 331.210602 331.273651 1.362549 0 1
2016-08-12 10:46:50+02:00 333.799042 331.670654 331.273651 0.460052 0 1
2016-08-12 10:46:51+02:00 338.351501 329.626648 331.273651 -2.044006 0 1
2016-08-12 10:46:52+02:00 338.809479 329.919128 331.273651 0.292480 0 1
2016-08-12 10:46:53+02:00 331.273651 322.339386 330.683929 -7.579742 0 1
2016-08-12 10:46:54+02:00 327.946960 317.968414 327.946960 -4.370972 0 1
... ... ... ... ... ... ...
2016-09-24 13:49:14+02:00 270.005371 275.051300 272.791046 0.093475 0 3902
2016-09-24 13:49:15+02:00 274.088257 274.479584 272.791046 -0.571716 0 3902
2016-09-24 13:49:16+02:00 283.387878 273.830322 272.791046 -0.649261 0 3902
2016-09-24 13:49:17+02:00 288.966187 273.658997 272.791046 -0.171326 0 3902
2016-09-24 13:49:18+02:00 279.765900 273.628174 272.791046 -0.030823 0 3902
2016-09-24 13:49:19+02:00 272.791046 273.750000 272.791046 0.121826 0 3902
2016-09-24 13:49:20+02:00 264.038208 274.555603 274.088257 0.805603 0 3902
2016-09-24 13:49:21+02:00 264.911835 276.254364 274.374512 1.698761 0 3902
2016-09-24 13:49:22+02:00 274.374512 277.507385 274.374512 1.253021 0 3902
2016-09-24 13:49:23+02:00 271.946411 278.033966 274.374512 0.526581 0 3902
2016-09-24 13:49:24+02:00 266.974426 279.658325 274.374512 1.624359 0 3902
2016-09-24 13:49:25+02:00 278.867065 281.782288 278.867065 2.123962 0 3902
2016-09-24 13:49:26+02:00 292.774689 283.528290 283.244171 1.746002 0 3902
2016-09-24 13:49:27+02:00 297.170990 283.814087 283.244171 0.285797 0 3902
2016-09-24 13:49:28+02:00 294.758423 282.819092 283.244171 -0.994995 0 3902
2016-09-24 13:49:29+02:00 297.633972 282.673859 283.244171 -0.145233 0 3902
2016-09-24 13:49:30+02:00 296.154541 283.480896 283.244171 0.807037 0 3902
2016-09-24 13:49:31+02:00 283.244171 283.515594 283.244171 0.034698 0 3902
2016-09-24 13:49:32+02:00 268.055878 283.086121 283.244171 -0.429474 0 3902
2016-09-24 13:49:33+02:00 263.429413 282.510132 283.244171 -0.575989 0 3902
2016-09-24 13:49:34+02:00 270.348938 280.771179 279.248779 -1.738953 0 3902
2016-09-24 13:49:35+02:00 275.851868 278.107239 275.851868 -2.663940 0 3902
2016-09-24 13:49:36+02:00 279.248779 275.823486 275.630096 -2.283752 0 3902
2016-09-24 13:49:37+02:00 288.050507 275.330994 275.630096 -0.492493 0 3902
2016-09-24 13:49:38+02:00 290.834900 277.369446 275.851868 2.038452 0 3902
2016-09-24 13:49:39+02:00 275.630096 268.087372 275.851868 -9.282074 -1 3902
2016-09-24 13:49:40+02:00 268.330566 260.896027 275.851868 -7.191345 0 3902
2016-09-24 13:49:41+02:00 271.033173 261.555847 275.630096 0.659821 0 3902
2016-09-24 13:49:42+02:00 277.826752 265.823120 271.033173 4.267273 0 3902
2016-09-24 13:49:43+02:00 290.478882 274.748108 268.330566 8.924988 0 3902

3726199 rows × 6 columns


In [385]:
gb_int = POWER_FILT_2.groupby('intervalo')
gb_int.wiener.count().hist(log=True, bins=500, lw=0)


Out[385]:
<matplotlib.axes._subplots.AxesSubplot at 0x196c94438>

In [387]:
duracion_int = gb_int.wiener.count()
duracion_int[duracion_int > 10000]


Out[387]:
intervalo
86      10510
2296    83331
2553    10861
Name: wiener, dtype: int64

In [396]:
POWER_FILT_2.iloc[:100000].wiener.rolling(10).std().plot()


Out[396]:
<matplotlib.axes._subplots.AxesSubplot at 0x18b07c748>

In [1]:
#POWER_FILT_2[POWER_FILT_2.intervalo == 2297]

In [266]:
bootstrap_plot(df_power.wiener, samples=100)


Out[266]:

In [250]:
#ax.add_patch?
mpp.Rectangle?

In [ ]:


In [72]:
b, a = signal.butter(4, 100, 'low', analog=True)
w, h = signal.freqs(b, a)
plt.semilogx(w, 20 * np.log10(abs(h)))
plt.title('Butterworth filter frequency response')
plt.xlabel('Frequency [radians / second]')
plt.ylabel('Amplitude [dB]')
plt.margins(0, 0.1)
plt.grid(which='both', axis='both')
plt.axvline(100, color='green') # cutoff frequency
plt.show()



In [73]:
signal.butter?

In [106]:
plt.figure(figsize=(18, 5))
t0, tf = 4600, 4800
plt.plot(power_standby.power.iloc[t0:tf].values, lw=2, alpha=.7, color='blue')
#plt.plot(signal.medfilt(power_standby.power, 3)[t0:tf], lw=1, alpha=.7, color='orange')
plt.plot(signal.wiener(power_standby.power, 15)[t0:tf], lw=2, alpha=.7, color='red')


Out[106]:
[<matplotlib.lines.Line2D at 0x166393630>]

In [107]:
power_standby['medfilt'] = signal.medfilt(power_standby.power, 15)
power_standby['wiener'] = signal.wiener(power_standby.power, 15)
power_standby.plot(lw=1, alpha=.7)


Out[107]:
<matplotlib.axes._subplots.AxesSubplot at 0x16ac948d0>

In [108]:
power_standby.iloc[t0:tf].plot(lw=1, alpha=.7)


Out[108]:
<matplotlib.axes._subplots.AxesSubplot at 0x16621fc88>

In [109]:
power_standby.wiener.plot(lw=1, alpha=.7)


Out[109]:
<matplotlib.axes._subplots.AxesSubplot at 0x166329be0>

In [114]:
power_standby.wiener.diff().fillna(0).abs().hist(bins=500, log=True)


Out[114]:
<matplotlib.axes._subplots.AxesSubplot at 0x150678f98>

In [128]:
power_standby['event'] = False
power_standby.loc[power_standby.wiener.diff().fillna(0).abs() > 7, 'event'] = True
ax = power_standby.wiener.plot()
ax.scatter(power_standby[power_standby.event].index, power_standby[power_standby.event].wiener, color='red')


Out[128]:
<matplotlib.collections.PathCollection at 0x15fa70a20>

In [136]:
deltas = power_standby.diff().fillna(0)
deltas.wiener.rolling(3).mean() #apply(lambda x: np.cumsum(x))


Out[136]:
ts
2016-08-29 00:00:00+02:00         NaN
2016-08-29 00:00:01+02:00         NaN
2016-08-29 00:00:02+02:00   -0.141764
2016-08-29 00:00:03+02:00    0.181305
2016-08-29 00:00:04+02:00   -0.288992
2016-08-29 00:00:05+02:00    2.168846
2016-08-29 00:00:06+02:00    2.571614
2016-08-29 00:00:07+02:00    1.198023
2016-08-29 00:00:08+02:00   -0.529463
2016-08-29 00:00:09+02:00   -1.175400
2016-08-29 00:00:10+02:00    0.407990
2016-08-29 00:00:11+02:00    0.529989
2016-08-29 00:00:12+02:00    0.489961
2016-08-29 00:00:13+02:00    0.182679
2016-08-29 00:00:14+02:00   -0.024249
2016-08-29 00:00:15+02:00   -0.037511
2016-08-29 00:00:16+02:00    0.094967
2016-08-29 00:00:17+02:00    0.303784
2016-08-29 00:00:18+02:00    0.341208
2016-08-29 00:00:19+02:00    0.203632
2016-08-29 00:00:20+02:00   -0.080285
2016-08-29 00:00:21+02:00   -0.389639
2016-08-29 00:00:22+02:00   -0.565366
2016-08-29 00:00:23+02:00   -0.469247
2016-08-29 00:00:24+02:00   -0.368773
2016-08-29 00:00:25+02:00   -0.536479
2016-08-29 00:00:26+02:00   -0.610418
2016-08-29 00:00:27+02:00   -0.380501
2016-08-29 00:00:28+02:00   -0.222847
2016-08-29 00:00:29+02:00   -0.221983
                               ...   
2016-08-29 23:59:30+02:00   -0.250260
2016-08-29 23:59:31+02:00   -0.059527
2016-08-29 23:59:32+02:00    0.067275
2016-08-29 23:59:33+02:00    0.132010
2016-08-29 23:59:34+02:00   -0.012532
2016-08-29 23:59:35+02:00   -0.221261
2016-08-29 23:59:36+02:00   -0.282731
2016-08-29 23:59:37+02:00   -0.128315
2016-08-29 23:59:38+02:00   -0.000819
2016-08-29 23:59:39+02:00   -0.037807
2016-08-29 23:59:40+02:00    0.024983
2016-08-29 23:59:41+02:00    0.063625
2016-08-29 23:59:42+02:00    0.068942
2016-08-29 23:59:43+02:00    0.006098
2016-08-29 23:59:44+02:00   -0.001053
2016-08-29 23:59:45+02:00    0.076198
2016-08-29 23:59:46+02:00    0.123551
2016-08-29 23:59:47+02:00    0.130459
2016-08-29 23:59:48+02:00    0.050275
2016-08-29 23:59:49+02:00    0.179012
2016-08-29 23:59:50+02:00    0.526245
2016-08-29 23:59:51+02:00    0.782264
2016-08-29 23:59:52+02:00    0.770474
2016-08-29 23:59:53+02:00    2.150396
2016-08-29 23:59:54+02:00    1.716787
2016-08-29 23:59:55+02:00   -0.416971
2016-08-29 23:59:56+02:00   -1.193072
2016-08-29 23:59:57+02:00    0.579336
2016-08-29 23:59:58+02:00    1.518169
2016-08-29 23:59:59+02:00    0.077971
Freq: S, Name: wiener, dtype: float64

In [146]:
cumsum_event = 0
s_cumsum = []
s_event = []
for v in deltas.wiener:
    if abs(v) > 10 and not s_event[-1]:
        cumsum_event = v
        s_event.append(True)
    else:
        s_event.append(False)
        cumsum_event += v
    s_cumsum.append(cumsum_event)
pd.DataFrame([s_cumsum, s_event], index=['event_acum', 'is_event']).T.plot()


Out[146]:
<matplotlib.axes._subplots.AxesSubplot at 0x15018b160>

In [169]:
THRESHOLD_EVENT = 10

def _is_event(x):
    mid = len(x) // 2
    xmin = np.min(x)
    xmax = np.max(x)
    if ((xmax - xmin) > THRESHOLD_EVENT):
        if (x[mid] == xmin):
            return -1
        elif (x[mid] == xmax):
            return 1
    return 0


eventos = deltas.wiener.diff().fillna(0).rolling(9, center=True).apply(_is_event).fillna(0)
eventos.plot(lw=1)
plt.show()
eventos.cumsum().plot(lw=1)


Out[169]:
<matplotlib.axes._subplots.AxesSubplot at 0x17ec80dd8>

In [170]:
f, ax = plt.subplots(1, 1, figsize=(18, 6))
ax = power_standby.wiener.iloc[t0:tf].plot(lw=2, color=tableau20[4])
ax2 = plt.twinx(ax)
ax2 = eventos.iloc[t0:tf].plot(ax=ax2, lw=1, color=tableau20[6])
ax.set_ylim((190, 250))


Out[170]:
(190, 250)

In [174]:
eventos.abs().resample('5min').sum().plot()


Out[174]:
<matplotlib.axes._subplots.AxesSubplot at 0x17efa70b8>

In [187]:
tt_ev = eventos[eventos != 0].index
tt_ev = pd.Index(tt_ev[1:].values - tt_ev[:-1].values)
tt_ev


Out[187]:
TimedeltaIndex(['00:00:02', '01:17:41', '00:00:05', '00:00:02', '00:00:08', '00:00:01', '01:54:55', '00:00:02',
                '00:00:06', '00:00:01', '00:00:04', '01:55:37', '00:00:02', '01:55:21', '00:00:01', '00:00:05',
                '00:00:02', '00:00:07', '00:00:01', '01:55:25', '00:00:02', '00:00:06', '00:00:02', '00:18:57',
                '00:00:03', '00:00:02', '01:35:58', '00:00:06', '00:00:02', '00:00:06', '00:00:01', '01:54:47',
                '00:00:02', '01:56:09', '00:00:06', '00:00:02', '01:25:54', '00:00:02', '00:27:56', '00:00:07',
                '00:00:02', '00:00:05', '00:00:02', '01:55:18', '00:00:02', '00:00:07', '00:43:58', '00:00:01',
                '01:12:25', '00:00:06', '00:00:02', '00:30:09', '00:00:04', '00:00:06', '00:00:01', '00:00:04',
                '00:00:02', '00:00:36', '00:00:03', '00:00:04', '01:25:47', '00:00:01', '00:00:06', '00:00:02',
                '00:32:32', '00:00:05', '00:00:01', '00:01:18', '00:00:05', '00:00:25', '00:00:01', '00:00:07',
                '00:00:03', '00:00:02', '00:00:29'],
               dtype='timedelta64[ns]', freq=None)

In [ ]:


In [ ]:


In [ ]:


In [205]:
deltas = power_standby.diff().fillna(0)

#pd.Index(tt_ev[1:].values - tt_ev[:-1].values)
#tt_ev
margin_td = pd.Timedelta('120s')

t0 = eventos.index[0]
list_index_events = []
last_event_init = None
events_in_interval = []
for t in eventos[eventos != 0].index:
    if last_event_init is None:
        last_event_init = t
        events_in_interval.append(t)
    elif t > last_event_init + margin_td:
        list_index_events.append(events_in_interval)
        events_in_interval = [t]
        last_event_init = t
    else:
        events_in_interval.append(t)
        last_event_init = t
    
len(list_index_events), list_index_events

sub_events = []
margin_ant = pd.Timedelta('5s')
cols = ['power', 'medfilt', 'wiener']
for ts in list_index_events:
    print_red(ts)
    t0, tf = ts[0] - margin_ant, ts[-1] + margin_td
    
    d_i = deltas.loc[t0:tf, cols].rename(columns)
    p_i = power_standby.loc[t0:tf, cols]
    sec_i = eventos.loc[t0:tf]
    print_cyan(d_i)
    print_info(p_i)
    print_yellow(sec_i)
    break


[Timestamp('2016-08-29 00:00:05+0200', tz='Europe/Madrid'), Timestamp('2016-08-29 00:00:07+0200', tz='Europe/Madrid')]
                               power   medfilt    wiener  event
ts                                                             
2016-08-29 00:00:00+02:00   0.000000  0.000000  0.000000      0
2016-08-29 00:00:01+02:00   1.601990  0.624222  1.692263  False
2016-08-29 00:00:02+02:00  -2.226212  0.283005 -2.117555  False
2016-08-29 00:00:03+02:00   0.907227  0.215881  0.969206  False
2016-08-29 00:00:04+02:00   0.215881  1.103104  0.281374  False
2016-08-29 00:00:05+02:00   5.296326  2.875626  5.255960  False
2016-08-29 00:00:06+02:00   2.266464  0.000000  2.177510  False
2016-08-29 00:00:07+02:00  -2.914139  0.000000 -3.839400  False
2016-08-29 00:00:08+02:00   0.623169  0.000000  0.073502  False
2016-08-29 00:00:09+02:00  -1.234161  0.058929  0.239698  False
2016-08-29 00:00:10+02:00  -0.058929  0.610992  0.910770  False
2016-08-29 00:00:11+02:00   2.503052  0.049927  0.439498  False
2016-08-29 00:00:12+02:00   0.582794  0.000000  0.119615  False
2016-08-29 00:00:13+02:00  -5.387100  0.000000 -0.011076  False
2016-08-29 00:00:14+02:00  -1.369537  0.000000 -0.181286  False
2016-08-29 00:00:15+02:00   0.295700  0.144928  0.079829  False
2016-08-29 00:00:16+02:00   4.094940  0.000000  0.386357  False
2016-08-29 00:00:17+02:00   7.839859  0.286682  0.445166  False
2016-08-29 00:00:18+02:00  -6.161850  0.715897  0.192100  False
2016-08-29 00:00:19+02:00  -4.582367  0.000000 -0.026370  False
2016-08-29 00:00:20+02:00   3.335968 -0.715897 -0.406585  False
2016-08-29 00:00:21+02:00  -0.286682  0.000000 -0.735961  False
2016-08-29 00:00:22+02:00   1.002579  0.000000 -0.553552  False
2016-08-29 00:00:23+02:00   5.221085  0.000000 -0.118229  False
2016-08-29 00:00:24+02:00  -0.352020  0.000000 -0.434539  False
2016-08-29 00:00:25+02:00  -3.854919 -0.286682 -1.056668  False
2016-08-29 00:00:26+02:00  -0.774002 -3.049286 -0.340048  False
2016-08-29 00:00:27+02:00  -5.120422  3.049286  0.255214  False
2016-08-29 00:00:28+02:00 -10.327744 -3.567642 -0.583709  False
2016-08-29 00:00:29+02:00   1.366592 -0.310059 -0.337454  False
...                              ...       ...       ...    ...
2016-08-29 00:01:38+02:00   5.136490  0.000000  0.105797  False
2016-08-29 00:01:39+02:00  -5.572098 -2.285934 -0.339360  False
2016-08-29 00:01:40+02:00  -1.154633  0.000000 -0.437210  False
2016-08-29 00:01:41+02:00  -3.290878  0.000000  0.370010  False
2016-08-29 00:01:42+02:00   0.000000  0.000000  0.663905  False
2016-08-29 00:01:43+02:00  -7.788620  0.000000  0.400643  False
2016-08-29 00:01:44+02:00   6.469040  0.000000  0.099341  False
2016-08-29 00:01:45+02:00  -0.985077  0.000000  0.026277  False
2016-08-29 00:01:46+02:00  -0.404739  0.000000 -0.850450  False
2016-08-29 00:01:47+02:00   2.952057 -0.147263 -1.200804  False
2016-08-29 00:01:48+02:00  11.051376 -1.003799 -0.712373  False
2016-08-29 00:01:49+02:00   0.950470  0.000000  0.169020  False
2016-08-29 00:01:50+02:00 -12.391769  0.000000  0.185265  False
2016-08-29 00:01:51+02:00  -1.003799  1.003799  0.511248  False
2016-08-29 00:01:52+02:00   6.426331  0.027359  0.098980  False
2016-08-29 00:01:53+02:00  -8.014404  0.285019  0.519655  False
2016-08-29 00:01:54+02:00 -10.827408  0.077545  0.815539  False
2016-08-29 00:01:55+02:00   6.171829  2.292633  0.168658  False
2016-08-29 00:01:56+02:00   9.930008 -0.094055 -0.590187  False
2016-08-29 00:01:57+02:00   0.243683 -2.276123 -0.970211  False
2016-08-29 00:01:58+02:00  -2.898880  0.000000 -0.519537  False
2016-08-29 00:01:59+02:00   0.285019  0.000000 -0.851660  False
2016-08-29 00:02:00+02:00   5.325058 -0.285019 -0.787157  False
2016-08-29 00:02:01+02:00   4.033508  0.000000 -0.130718  False
2016-08-29 00:02:02+02:00  -6.751144  0.000000  0.569935  False
2016-08-29 00:02:03+02:00  -0.331299  0.285019  1.227526  False
2016-08-29 00:02:04+02:00  -4.749893  0.000000 -0.154644  False
2016-08-29 00:02:05+02:00  -5.631668 -0.285019 -1.173102  False
2016-08-29 00:02:06+02:00  -5.985641 -2.188751 -0.656784  False
2016-08-29 00:02:07+02:00   7.393875 -0.556488 -0.202017  False

[128 rows x 4 columns]
                                power     medfilt      wiener  event
ts                                                                  
2016-08-29 00:00:00+02:00  192.346436  191.722214  191.465496  False
2016-08-29 00:00:01+02:00  193.948425  192.346436  193.157759  False
2016-08-29 00:00:02+02:00  191.722214  192.629440  191.040204  False
2016-08-29 00:00:03+02:00  192.629440  192.845322  192.009410  False
2016-08-29 00:00:04+02:00  192.845322  193.948425  192.290783  False
2016-08-29 00:00:05+02:00  198.141647  196.824051  197.546743  False
2016-08-29 00:00:06+02:00  200.408112  196.824051  199.724253  False
2016-08-29 00:00:07+02:00  197.493973  196.824051  195.884853  False
2016-08-29 00:00:08+02:00  198.117142  196.824051  195.958355  False
2016-08-29 00:00:09+02:00  196.882980  196.882980  196.198053  False
2016-08-29 00:00:10+02:00  196.824051  197.493973  197.108823  False
2016-08-29 00:00:11+02:00  199.327103  197.543900  197.548321  False
2016-08-29 00:00:12+02:00  199.909897  197.543900  197.667935  False
2016-08-29 00:00:13+02:00  194.522797  197.543900  197.656859  False
2016-08-29 00:00:14+02:00  193.153259  197.543900  197.475574  False
2016-08-29 00:00:15+02:00  193.448959  197.688828  197.555403  False
2016-08-29 00:00:16+02:00  197.543900  197.688828  197.941759  False
2016-08-29 00:00:17+02:00  205.383759  197.975510  198.386925  False
2016-08-29 00:00:18+02:00  199.221909  198.691406  198.579025  False
2016-08-29 00:00:19+02:00  194.639542  198.691406  198.552655  False
2016-08-29 00:00:20+02:00  197.975510  197.975510  198.146070  False
2016-08-29 00:00:21+02:00  197.688828  197.975510  197.410109  False
2016-08-29 00:00:22+02:00  198.691406  197.975510  196.856557  False
2016-08-29 00:00:23+02:00  203.912491  197.975510  196.738328  False
2016-08-29 00:00:24+02:00  203.560471  197.975510  196.303789  False
2016-08-29 00:00:25+02:00  199.705551  197.688828  195.247121  False
2016-08-29 00:00:26+02:00  198.931549  194.639542  194.907073  False
2016-08-29 00:00:27+02:00  193.811127  197.688828  195.162287  False
2016-08-29 00:00:28+02:00  183.483383  194.121185  194.578579  False
2016-08-29 00:00:29+02:00  184.849976  193.811127  194.241124  False
...                               ...         ...         ...    ...
2016-08-29 00:01:38+02:00  203.429825  195.698151  194.721270  False
2016-08-29 00:01:39+02:00  197.857727  193.412216  194.381910  False
2016-08-29 00:01:40+02:00  196.703094  193.412216  193.944700  False
2016-08-29 00:01:41+02:00  193.412216  193.412216  194.314710  False
2016-08-29 00:01:42+02:00  193.412216  193.412216  194.978615  False
2016-08-29 00:01:43+02:00  185.623596  193.412216  195.379258  False
2016-08-29 00:01:44+02:00  192.092636  193.412216  195.478599  False
2016-08-29 00:01:45+02:00  191.107559  193.412216  195.504876  False
2016-08-29 00:01:46+02:00  190.702820  193.412216  194.654426  False
2016-08-29 00:01:47+02:00  193.654877  193.264954  193.453622  False
2016-08-29 00:01:48+02:00  204.706253  192.261154  192.741250  False
2016-08-29 00:01:49+02:00  205.656723  192.261154  192.910269  False
2016-08-29 00:01:50+02:00  193.264954  192.261154  193.095534  False
2016-08-29 00:01:51+02:00  192.261154  193.264954  193.606782  False
2016-08-29 00:01:52+02:00  198.687485  193.292313  193.705762  False
2016-08-29 00:01:53+02:00  190.673080  193.577332  194.225417  False
2016-08-29 00:01:54+02:00  179.845673  193.654877  195.040956  False
2016-08-29 00:01:55+02:00  186.017502  195.947510  195.209614  False
2016-08-29 00:01:56+02:00  195.947510  195.853455  194.619427  False
2016-08-29 00:01:57+02:00  196.191193  193.577332  193.649217  False
2016-08-29 00:01:58+02:00  193.292313  193.577332  193.129679  False
2016-08-29 00:01:59+02:00  193.577332  193.577332  192.278019  False
2016-08-29 00:02:00+02:00  198.902390  193.292313  191.490862  False
2016-08-29 00:02:01+02:00  202.935898  193.292313  191.360144  False
2016-08-29 00:02:02+02:00  196.184753  193.292313  191.930079  False
2016-08-29 00:02:03+02:00  195.853455  193.577332  193.157605  False
2016-08-29 00:02:04+02:00  191.103561  193.577332  193.002961  False
2016-08-29 00:02:05+02:00  185.471893  193.292313  191.829859  False
2016-08-29 00:02:06+02:00  179.486252  191.103561  191.173075  False
2016-08-29 00:02:07+02:00  186.880127  190.547073  190.971058  False

[128 rows x 4 columns]

In [ ]:


In [ ]: